

rm(list = ls(all=TRUE))
cat("\f")

library(rootSolve)
library(nloptr)
library(entropy)
library(LaplacesDemon) # KLD function 

source("generate_data.R")
source("compute_nde.R") # { G-formula={Y,L,M} }
source("compute_mse.R") # includes {estimate_y} for reparam estimation of Y

set.seed(0)

# ################################################
# Data
# ################################################

# initialization 
n = 5000 

nde_0.vec <- nde_hat.vec <- nde_1.vec <- nde_2.vec <- nde_3.vec <- c()
mse_0.vec <- mse_1.vec <- mse_2.vec <- mse_3.vec  <- c()

trials = 100

for(t in 1:trials){
 
 cat("\n \n trial = ", t, "\n \n")
 
# generate data
data_obj = generate_data(n)
dat = data_obj$data
beta_y0 = data_obj$beta_y
beta_l0 = data_obj$beta_l
beta_m0 = data_obj$beta_m
beta_a0 = data_obj$beta_a

beta_0 = list(beta_y = beta_y0, 
              beta_l = beta_l0, 
              beta_m = beta_m0, 
              beta_a = beta_a0)

attach(dat, warn.conflicts = FALSE)

# generate test data 
n1 = floor(n*0.8) # 20% test data
idx_test = (n1+1):n
Y_test = dat$Y[idx_test]

# Formulas
fmla_y = as.formula(Y ~ C + A + M + L + AC + AM + AL + AML)
fmla_l = as.formula(L ~ C + A + M + AC + AM + ACM)
fmla_m = as.formula(M ~ C + A + AC)
fmla_a = as.formula(A ~ C)
fmla = list(fmla_y = fmla_y, fmla_l = fmla_l, fmla_m = fmla_m, fmla_a = fmla_a)


# ################################################
# (0) Unconstrained MLE 
# ################################################

source("0_unconstrainedMLE.R")

# Initialization
opt_0 = list(reparam = FALSE, estimator = "G-formula")
px_0 = rep(1/n, n)

# Fitting 
model_hat = optimize_unconst(dat, idx_test, fmla)
beta_hat = list(beta_a=model_hat$beta_a, beta_m=model_hat$beta_m, beta_l=model_hat$beta_l, beta_y=model_hat$beta_y)

# Evaluations
mse_0 = mean((Y[idx_test] - model_hat$Yhat[idx_test])^2)

# Effect
nde_hat = compute_effect(dat, beta_hat, px_0, opt_0)   

# True effect
nde_0 = compute_effect(dat, beta_0, px_0, opt_0) 


# ################################################
# (1) Constrained MLE, estimator="G-formula"
# ################################################

source("1_constrainedMLE.R")

# Initialization
opt_1 = list(reparam = FALSE, tau_l = -0.05, tau_u = 0.05)
func = compute_effect
px_1 = rep(1/n, n)
# beta_start_1 = NULL
beta_start_1 = c(beta_hat$beta_m, beta_hat$beta_l, beta_hat$beta_y)

# Fitting (train data)
model_1 = optimize_nloptr(dat[-idx_test,], func, beta_start_1, px_1[-idx_test], fmla, opt_1)
beta_1 = list(beta_a=model_1$beta_a, beta_m=model_1$beta_m, beta_l=model_1$beta_l, beta_y=model_1$beta_y)

# Evaluations (test data)
mse_1 = compute_mse(dat[idx_test,], beta_1, px_1[idx_test], opt_1)

# Effect
nde_1 = compute_effect(dat[-idx_test, ], beta_1, px_1[-idx_test], opt_1)


# ################################################
# (2) Reparameterized MLE, estimator="G-formula"
# ################################################

# source("2_reparamMLE.R")
source("2_reparam.R")

# Initialization
opt_2 = list(reparam = TRUE, tau_l = -0.05, tau_u = 0.05)
func = compute_effect
px_2 = rep(1/n, n)
beta_start_2 = NULL
fmla_f = as.formula(Y ~ -1 + C + M + L + AC + AM + AL + AML) # no "intercept" and no "A"
fmla_2 = c(fmla_f=fmla_f, fmla_l=fmla_l, fmla_m=fmla_m)

# Fitting (train data)
model_2 = optimize_reparam(dat[-idx_test, ], fmla_2, beta_start_2, func, px_2[-idx_test], opt_2)
beta_2 = list(beta_a=beta_hat$beta_a, beta_m=model_2$beta_m, beta_l=model_2$beta_l, beta_y=model_2$beta_y)

# Evaluations (test data)
mse_2 = compute_mse(dat[idx_test, ], beta_2, px_2[idx_test], opt_2)

# Effect
nde_2 = compute_effect(dat, beta_2, px_2, opt_2)


# ################################################
# (3) hybrid likelihood - Batch Prediction  
# ################################################

source("3_hybridMLE.R")

# Initialization
opt_3 = list(reparam = FALSE,
             alpha = 0.001, # alpha: initial step size
             threshold = 0.05, # threshold: stopping criterion
             max_iter = 10000, # max_iter: maximum number of iterations
             delta = 0.001) # delta: for numerical differentiation

# beta_start_3 = rep(0.1, length(beta_m0) + length(beta_y0))
beta_start_3 = c(beta_hat$beta_m, beta_hat$beta_l, beta_hat$beta_y)
# beta_start_3 = c(beta_1$beta_m, beta_1$beta_y)

# Fitting (train model)
model_3 = optimize_hybrid(beta_start_3, dat, fmla, opt_3)
px_3 = model_3$px
beta_3 = list(beta_m=model_3$beta_m, beta_l=model_3$beta_l, beta_y=model_3$beta_y, beta_a=beta_hat$beta_a)

# Evaluations (test data)
mse_3 = mean((Y[idx_test] - model_3$Y_hat[idx_test])^2)

# Effect 
nde_3 = compute_effect(dat, beta_3, px_3, opt_3)
nde_3_b = model_3$nde

# ################################################
# Gather the results
# ################################################

nde_0.vec <- c(nde_0.vec, nde_0)
nde_hat.vec <- c(nde_hat.vec, nde_hat)
nde_1.vec <- c(nde_1.vec, nde_1)
nde_2.vec <- c(nde_2.vec, nde_2)
nde_3.vec <- c(nde_3.vec, nde_3)

mse_0.vec <- c(mse_0.vec, mse_0)
mse_1.vec <- c(mse_1.vec, mse_1)
mse_2.vec <- c(mse_2.vec, mse_2)
mse_3.vec <- c(mse_3.vec, mse_3)
}

results = round(data.frame(nde_0 = nde_0.vec,
                           nde_hat = nde_hat.vec,
                           nde_1 = nde_1.vec,
                           nde_2 = nde_2.vec,
                           mse_0 = mse_0.vec,
                           mse_1 = mse_1.vec,
                           mse_2 = mse_2.vec,
                           mse_3 = mse_3.vec), 6)

write.csv(results, "results_sim3.csv", row.names = F)


